## CLEANING US ##
setwd("C:/Users/yuxua/Documents/careers/DEL intern/data deposit")
library(haven)
library(dplyr)
library(janitor)

US_W1 <- read_sav("C:/Users/yuxua/Documents/careers/DEL intern/data deposit/UCL_DEL_Panel_US_Wave1_client.sav")
US_W2 <- read_sav("C:/Users/yuxua/Documents/careers/DEL intern/data deposit/UCL_DEL_Panel_US_Wave2_client.sav")
US_W3 <- read_sav("C:/Users/yuxua/Documents/careers/DEL intern/data deposit/UCL_DEL_Panel_US_Wave3_client_DELseg.sav")
US_W4 <- read_sav("C:/Users/yuxua/Documents/careers/DEL intern/data deposit/UCL_DEL_Panel_US_Wave4_client.sav")
US_W5 <- read_sav("UCL_DEL_Panel_US_Wave5_client.sav")

##### update US_W1
names(US_W1)
# all missing variables in except presvote16postx, but unimportant

US_W1 <- US_W1 %>% 
  rename(education = educ,
         income = profile_gross_household,
         religion = religpew,
         ben1_w1 = ben1,
         ben1_w1_1 = ben1_1,
         ben1_w1_2 = ben1_2,
         ben1_w1_3 = ben1_3,
         ben1_w1_4 = ben1_4,
         cost1_w1 = cost1,
         cost1_w1_1 = cost1_1,
         cost1_w1_2 = cost1_2,
         cost1_w1_3 = cost1_3,
         cost1_w1_4 = cost1_4,
         moral1_w1 = moral1) 

#recode
table(US_W1$purpose_w1) #1-12, recode
table(US_W1$cost5_w1) #1-12, recode
table(US_W1$trust1_w1) #0-10, 12, needs to be recoded
table(US_W1$trust2_w1) #0-10, 12, needs to be recoded
table(US_W1$trust3_w1) #0-10, 12, needs to be recoded
table(US_W1$dem1_w1) #1-12, recode
table(US_W1$dnut_w1) #1-12, recode

#subtracting the variables
US_W1 <- US_W1 %>%
  mutate(purpose_w1 = purpose_w1-1)
US_W1 <- US_W1 %>%
  mutate(cost5_w1 = cost5_w1-1)
US_W1 <- US_W1 %>%
  mutate(dem1_w1 = dem1_w1-1)
US_W1 <- US_W1 %>%
  mutate(dnut_w1 = dnut_w1-1)

#turn to numeric before recoding 12 to 11
US_W1$trust1_w1 <- as.numeric(US_W1$trust1_w1)
US_W1$trust2_w1 <- as.numeric(US_W1$trust2_w1)
US_W1$trust3_w1 <- as.numeric(US_W1$trust3_w1)

US_W1 <- US_W1 %>%
  mutate(trust1_w1 = dplyr::recode(trust1_w1, '12'=11))
US_W1 <- US_W1 %>%
  mutate(trust2_w1 = dplyr::recode(trust2_w1, '12'=11))
US_W1 <- US_W1 %>%
  mutate(trust3_w1 = dplyr::recode(trust3_w1, '12'=11))

#track8, 9, 10
table(US_W1$track6_w1) #0-11
table(US_W1$track8_w1_1) #0-11
table(US_W1$track8_w1_2) #0-11
table(US_W1$track8_w1_3) #0-11
table(US_W1$track8_w1_4) #0-11
table(US_W1$track8_w1_5) #0-11
table(US_W1$track9_w1) #0-10, 12, needs to be recoded
table(US_W1$track10_w1) #0-11

#turn to numeric before recoding 12 to 11
US_W1$track9_w1 <- as.numeric(US_W1$track9_w1)
US_W1 <- US_W1 %>%
  mutate(track9_w1 = dplyr::recode(track9_w1, '12'=11))

#after checking codebook
table(US_W1$eff1_w1)
table(US_W1$leftrt_w1)

US_W1$eff1_w1 <- as.numeric(US_W1$eff1_w1)
US_W1$leftrt_w1 <- as.numeric(US_W1$leftrt_w1)

US_W1 <- US_W1 %>%
  mutate(eff1_w1 = dplyr::recode(eff1_w1, '12'=11))
US_W1 <- US_W1 %>%
  mutate(leftrt_w1 = dplyr::recode(leftrt_w1, '12'=11))

write_sav(US_W1, "UCL_DEL_Panel_US_Wave1_client_new.sav")



##### update US_W2
names(US_W2)
# all missing variables in

US_W2 <- US_W2 %>% 
  rename(education = educ,
         income = profile_gross_household,
         religion = religpew,
         important_mental_w2 = important_mental,
         important_aged_w2 = important_aged) 

#recode
table(US_W2$purpose_w2) #1-12, recode
table(US_W2$cost5_w2) #1-12, recode
table(US_W2$trust1_w2) #0-10, 12, needs to be recoded
table(US_W2$trust2_w2) #0-10, 12, needs to be recoded
table(US_W2$trust3_w2) #0-10, 12, needs to be recoded
table(US_W2$dem1_w2) #1-12, recode
table(US_W2$dnut_w2) #1-12, recode

#subtracting the variables
US_W2 <- US_W2 %>%
  mutate(purpose_w2 = purpose_w2-1)
US_W2 <- US_W2 %>%
  mutate(cost5_w2 = cost5_w2-1)
US_W2 <- US_W2 %>%
  mutate(dem1_w2 = dem1_w2-1)
US_W2 <- US_W2 %>%
  mutate(dnut_w2 = dnut_w2-1)

#turn to numeric before recoding 12 to 11
US_W2$trust1_w2 <- as.numeric(US_W2$trust1_w2)
US_W2$trust2_w2 <- as.numeric(US_W2$trust2_w2)
US_W2$trust3_w2 <- as.numeric(US_W2$trust3_w2)

US_W2 <- US_W2 %>%
  mutate(trust1_w2 = dplyr::recode(trust1_w2, '12'=11))
US_W2 <- US_W2 %>%
  mutate(trust2_w2 = dplyr::recode(trust2_w2, '12'=11))
US_W2 <- US_W2 %>%
  mutate(trust3_w2 = dplyr::recode(trust3_w2, '12'=11))

#track8, 9, 10
table(US_W2$track6_w2) #0-11
table(US_W2$track8_w2_1) #0-11
table(US_W2$track8_w2_2) #0-11
table(US_W2$track8_w2_3) #0-11
table(US_W2$track8_w2_4) #0-11
table(US_W2$track8_w2_5) #0-11
table(US_W2$track9_w2) #0-10, 12, needs to be recoded
table(US_W2$track10_w2) #0-11

#turn to numeric before recoding 12 to 11
US_W2$track9_w2 <- as.numeric(US_W2$track9_w2)
US_W2 <- US_W2 %>%
  mutate(track9_w2 = dplyr::recode(track9_w2, '12'=11))

#after checking codebook
table(US_W2$tradeoff_w2) #1-12, recode to 1 to 11
US_W2 <- US_W2 %>%
  mutate(tradeoff_w2 = tradeoff_w2-1)

table(US_W2$eff1_w2) #0-10, 12 needs to be recoded
table(US_W2$leftrt_w2) #0-10, 12 needs to be recoded
US_W2$eff1_w2 <- as.numeric(US_W2$eff1_w2)
US_W2$leftrt_w2 <- as.numeric(US_W2$leftrt_w2)
US_W2 <- US_W2 %>%
  mutate(eff1_w2 = dplyr::recode(eff1_w2, '12'=11))
US_W2 <- US_W2 %>%
  mutate(leftrt_w2 = dplyr::recode(leftrt_w2, '12'=11))

write_sav(US_W2, "UCL_DEL_Panel_US_Wave2_client_new.sav")


##### update US_W3
names(US_W3)
# all missing variables in

US_W3 <- US_W3 %>% 
  rename(education = educ,
         income = profile_gross_household,
         religion = religpew) 

#recode
table(US_W3$purpose_w3) #1-12, recode
table(US_W3$cost5_w3) #1-12, recode
table(US_W3$trust1_w3) #0-10, 12, needs to be recoded
table(US_W3$trust2_w3) #0-10, 12, needs to be recoded
table(US_W3$trust3_w3) #0-10, 12, needs to be recoded
table(US_W3$dem1_w3) #1-12, recode
table(US_W3$dnut_w3) #1-12, recode

#subtracting the variables
US_W3 <- US_W3 %>%
  mutate(purpose_w3 = purpose_w3-1)
US_W3 <- US_W3 %>%
  mutate(cost5_w3 = cost5_w3-1)
US_W3 <- US_W3 %>%
  mutate(dem1_w3 = dem1_w3-1)
US_W3 <- US_W3 %>%
  mutate(dnut_w3 = dnut_w3-1)

#turn to numeric before recoding 12 to 11
US_W3$trust1_w3 <- as.numeric(US_W3$trust1_w3)
US_W3$trust2_w3 <- as.numeric(US_W3$trust2_w3)
US_W3$trust3_w3 <- as.numeric(US_W3$trust3_w3)

US_W3 <- US_W3 %>%
  mutate(trust1_w3 = dplyr::recode(trust1_w3, '12'=11))
US_W3 <- US_W3 %>%
  mutate(trust2_w3 = dplyr::recode(trust2_w3, '12'=11))
US_W3 <- US_W3 %>%
  mutate(trust3_w3 = dplyr::recode(trust3_w3, '12'=11))

#track8, 9, 10
table(US_W3$track6_w3) #0-11
table(US_W3$track8_w3_1) #0-11
table(US_W3$track8_w3_2) #0-11
table(US_W3$track8_w3_3) #0-11
table(US_W3$track8_w3_4) #0-11
table(US_W3$track8_w3_5) #0-11
table(US_W3$track9_w3) #0-10, 12, needs to be recoded
table(US_W3$track10_w3) #0-11

#turn to numeric before recoding 12 to 11
US_W3$track9_w3 <- as.numeric(US_W3$track9_w3)
US_W3 <- US_W3 %>%
  mutate(track9_w3 = dplyr::recode(track9_w3, '12'=11))

#after checking codebook

table(US_W3$eff1_w3) #0-10, 12 needs to be recoded
table(US_W3$leftrt_w3) #0-10, 12 needs to be recoded
US_W3$eff1_w3 <- as.numeric(US_W3$eff1_w3)
US_W3$leftrt_w3 <- as.numeric(US_W3$leftrt_w3)
US_W3 <- US_W3 %>%
  mutate(eff1_w3 = dplyr::recode(eff1_w3, '12'=11))
US_W3 <- US_W3 %>%
  mutate(leftrt_w3 = dplyr::recode(leftrt_w3, '12'=11))

write_sav(US_W3, "UCL_DEL_Panel_US_Wave3_client_DELseg_new.sav")



##### update US_W4
names(US_W4)
# all missing variables in

US_W4$religion

US_W4 <- US_W4 %>%
  rename(education = educ,
         income = profile_gross_household,
         religion = religpew)

#recode
table(US_W4$purpose_w4) #1-12, recode
table(US_W4$cost5_w4) #1-12, recode
table(US_W4$trust1_w4) #0-10, 12, needs to be recoded
table(US_W4$trust2_w4) #0-10, 12, needs to be recoded
table(US_W4$trust3_w4) #0-10, 12, needs to be recoded
table(US_W4$dem1_w4) #1-12, recode
table(US_W4$dnut_w4) #1-12, recode

table(US_W4$ccemot_1)
table(US_W4$ccemot_2)
table(US_W4$ccemot_3)
table(US_W4$ccemot_4)
table(US_W4$ccemot_5)
table(US_W4$ccemot_6)

#subtracting the variables
US_W4 <- US_W4 %>%
  mutate(purpose_w4 = purpose_w4-1)
US_W4 <- US_W4 %>%
  mutate(cost5_w4 = cost5_w4-1)
US_W4 <- US_W4 %>%
  mutate(dem1_w4 = dem1_w4-1)
US_W4 <- US_W4 %>%
  mutate(dnut_w4 = dnut_w4-1)

#turn to numeric before recoding 12 to 11
US_W4$trust1_w4 <- as.numeric(US_W4$trust1_w4)
US_W4$trust2_w4 <- as.numeric(US_W4$trust2_w4)
US_W4$trust3_w4 <- as.numeric(US_W4$trust3_w4)

US_W4 <- US_W4 %>%
  mutate(trust1_w4 = dplyr::recode(trust1_w4, '12'=11))
US_W4 <- US_W4 %>%
  mutate(trust2_w4 = dplyr::recode(trust2_w4, '12'=11))
US_W4 <- US_W4 %>%
  mutate(trust3_w4 = dplyr::recode(trust3_w4, '12'=11))

#track 8, 9, 10
table(US_W4$track6_w4) #0-11
table(US_W4$track8_w4_1) #0-11
table(US_W4$track8_w4_2) #0-11
table(US_W4$track8_w4_3) #0-11
table(US_W4$track8_w4_4) #0-11
table(US_W4$track8_w4_5) #0-11
table(US_W4$track9_w4) #0-10, 12, needs to be recoded
table(US_W4$track10_w4) #0-11

#turn to numeric before recoding 12 to 11
US_W4$track9_w4 <- as.numeric(US_W4$track9_w4)
US_W4 <- US_W4 %>%
  mutate(track9_w4 = dplyr::recode(track9_w4, '12'=11))

#after checking codebook

table(US_W4$eff1_w4) #0-10, 12 needs to be recoded
table(US_W4$leftrt_w4) #0-10, 12 needs to be recoded
US_W4$eff1_w4 <- as.numeric(US_W4$eff1_w4)
US_W4$leftrt_w4 <- as.numeric(US_W4$leftrt_w4)
US_W4 <- US_W4 %>%
  mutate(eff1_w4 = dplyr::recode(eff1_w4, '12'=11))
US_W4 <- US_W4 %>%
  mutate(leftrt_w4 = dplyr::recode(leftrt_w4, '12'=11))

US_W4$DELseg

write_sav(US_W4, "DEL_Panel_UnitedStates_Wave_4_2022_Data.sav")

##### update US_W5
names(US_W5)
# all missing variables in

US_W5$religion

US_W5 <- US_W5 %>%
  rename(education = educ,
         income = profile_gross_household,
         religion = religpew)

#recode
table(US_W5$purpose_w5) #1-12, recode
table(US_W5$cost5_w5) #1-12, recode
table(US_W5$trust1_w5) #0-10, 12, needs to be recoded
table(US_W5$trust2_w5) #0-10, 12, needs to be recoded
table(US_W5$trust3_w5) #0-10, 12, needs to be recoded
table(US_W5$dem1_w5) #1-12, recode
table(US_W5$dnut_w5) #1-12, recode

table(US_W5$ccemot_1)
table(US_W5$ccemot_2)
table(US_W5$ccemot_3)
table(US_W5$ccemot_4)
table(US_W5$ccemot_5)
table(US_W5$ccemot_6)

#subtracting the variables
US_W5 <- US_W5 %>%
  mutate(purpose_w5 = purpose_w5-1)
US_W5 <- US_W5 %>%
  mutate(cost5_w5 = cost5_w5-1)
US_W5 <- US_W5 %>%
  mutate(dem1_w5 = dem1_w5-1)
US_W5 <- US_W5 %>%
  mutate(dnut_w5 = dnut_w5-1) # missing

#turn to numeric before recoding 12 to 11
US_W5$trust1_w5 <- as.numeric(US_W5$trust1_w5)
US_W5$trust2_w5 <- as.numeric(US_W5$trust2_w5)
US_W5$trust3_w5 <- as.numeric(US_W5$trust3_w5)

US_W5 <- US_W5 %>%
  mutate(trust1_w5 = dplyr::recode(trust1_w5, '12'=11))
US_W5 <- US_W5 %>%
  mutate(trust2_w5 = dplyr::recode(trust2_w5, '12'=11))
US_W5 <- US_W5 %>%
  mutate(trust3_w5 = dplyr::recode(trust3_w5, '12'=11))

#track 8, 9, 10
table(US_W5$track6_w5) #0-11
table(US_W5$track8_w5_1) #0-11
table(US_W5$track8_w5_2) #0-11
table(US_W5$track8_w5_3) #0-11
table(US_W5$track8_w5_4) #0-11
table(US_W5$track8_w5_5) #0-11
table(US_W5$track9_w5) #0-10, 12, needs to be recoded
table(US_W5$track10_w5) #0-11

#turn to numeric before recoding 12 to 11
US_W5$track9_w5 <- as.numeric(US_W5$track9_w5)
US_W5 <- US_W5 %>%
  mutate(track9_w5 = dplyr::recode(track9_w5, '12'=11))

#after checking codebook

table(US_W5$eff1_w5) #0-10, 12 needs to be recoded
table(US_W5$leftrt_w5) #0-10, 12 needs to be recoded
US_W5$eff1_w5 <- as.numeric(US_W5$eff1_w5)
US_W5$leftrt_w5 <- as.numeric(US_W5$leftrt_w5)
US_W5 <- US_W5 %>%
  mutate(eff1_w5 = dplyr::recode(eff1_w5, '12'=11))
US_W5 <- US_W5 %>%
  mutate(leftrt_w5 = dplyr::recode(leftrt_w5, '12'=11))

US_W5 <- US_W5 %>% 
  rename(WeightPanelW5 = weight)

US_W5$DELseg

write_sav(US_W5, "DEL_Panel_UnitedStates_Wave_4_2022_Data.sav")




#################################################################

names(US_W1)
names(US_W2)
names(US_W3)
names(US_W4)

# comparing dataframes for US

US_W1_comp <- US_W1 %>% 
  select(GUNQID:cos1_w1_4)
US_W2_comp <- US_W2 %>% 
  select(GUNQID:cos1_w2_4)
US_W3_comp <- US_W3 %>% 
  select(GUNQID:cos1_w3_4)
US_W4_comp <- US_W4 %>% 
  select(GUNQID:cos1_w4_4)

US_W1_comp <- US_W1_comp %>%
  rename_with(~paste0(sub("_w1*", "", .)), -1)
US_W2_comp <- US_W2_comp %>%
  rename_with(~paste0(sub("_w2*", "", .)), -1)
US_W3_comp <- US_W3_comp %>%
  rename_with(~paste0(sub("_w3*", "", .)), -1)
US_W4_comp <- US_W4_comp %>%
  rename_with(~paste0(sub("_w4*", "", .)), -1)


data.comp_US <- compare_df_cols(US_W1_comp, US_W2_comp, US_W3_comp, US_W4_comp)


